Purpose: Retrieve full list of UK stations from http://orr.gov.uk/statistics/published-stats/station-usage-estimates
Create tables in the database and add data
In [1]:
import pandas as pd
pd.options.display.max_columns = 999
In [2]:
stations_df = pd.read_excel("http://orr.gov.uk/__data/assets/excel_doc/0019/20179/Estimates-of-Station-Usage-in-2014-15.xlsx",2)
In [3]:
stations_df.head()
Out[3]:
In [4]:
headers = [h.lower() for h in list(stations_df.columns)]
headers = [h.replace(" ", "_").replace("(", "").replace(")","") for h in headers]
stations_df.columns = headers
stations_df.head()
stations_df["london_or_gb"] = "gb"
stations_df.loc[stations_df["county_or_unitary_authority"] == "Greater London","london_or_gb"] = "london"
stations_df.head()
Out[4]:
In [5]:
# Now write out to postgres
from mylibrary.connections import conn, engine, cursor
stations_df.to_sql("all_stations", engine, schema="tt_gh", if_exists="replace", index=False)
In [6]:
# Create geometry column for the points including a spatial index for efficient querying
sql = """
SELECT AddGeometryColumn ('tt_gh', 'all_stations', 'geom', 27700, 'POINT', 2);
UPDATE tt_gh.all_stations SET geom = ST_GeomFromText('POINT(' || os_grid_easting || ' ' || os_grid_northing || ')', 27700 );
CREATE INDEX idx_geom_all_stations_points ON tt_gh.all_stations USING gist(geom);
"""
cursor.execute(sql)
conn.commit()
In [7]:
#Make a lat and lng column
sql = """
ALTER TABLE tt_gh.all_stations ADD lat float, ADD lng float, ADD icscode text,
ADD icscode_status text, ADD tfl_request text, ADD tfl_response json, ADD tfl_message text;
UPDATE tt_gh.all_stations SET
lng = ST_X(ST_TRANSFORM(geom, 4326)),
lat = ST_Y(ST_TRANSFORM(geom,4326));
ALTER TABLE tt_gh.all_stations ADD PRIMARY KEY (nlc);
"""
cursor.execute(sql)
conn.commit()
In [8]:
sql = """
select * from tt_gh.all_stations limit 5
"""
pd.read_sql(sql, conn)
Out[8]:
In [9]:
sql = """
select count(*), london_or_gb from tt_gh.all_stations group by london_or_gb
"""
pd.read_sql(sql, conn)
Out[9]: